-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64][GlobalISel] Add G_FPEXT(G_FCONSTANT) folding #160902
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[AArch64][GlobalISel] Add G_FPEXT(G_FCONSTANT) folding #160902
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Ryan Cowan (HolyMolyCowMan) ChangesThis change adds a new folding pattern, folding a G_FPEXT(G_FCONSTANT) to a G_FCONSTANT. To make this work on AArch64, the Patch is 302.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160902.diff 23 Files Affected:
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 204e1f6887fa2..57828a270ec00 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -694,6 +694,7 @@ def constant_fold_fabs : constant_fold_unary_fp_op_rule<G_FABS>;
def constant_fold_fsqrt : constant_fold_unary_fp_op_rule<G_FSQRT>;
def constant_fold_flog2 : constant_fold_unary_fp_op_rule<G_FLOG2>;
def constant_fold_fptrunc : constant_fold_unary_fp_op_rule<G_FPTRUNC>;
+def constant_fold_fpext : constant_fold_unary_fp_op_rule<G_FPEXT>;
// Fold constant zero int to fp conversions.
class itof_const_zero_fold_rule<Instruction opcode> : GICombineRule <
@@ -712,6 +713,7 @@ def constant_fold_fp_ops : GICombineGroup<[
constant_fold_fsqrt,
constant_fold_flog2,
constant_fold_fptrunc,
+ constant_fold_fpext,
itof_const_zero_fold_si,
itof_const_zero_fold_ui
]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 0ebee2cfd8688..2206a558f9f4c 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1728,6 +1728,7 @@ static APFloat constantFoldFpUnary(const MachineInstr &MI,
Result.clearSign();
return Result;
}
+ case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC: {
bool Unused;
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 076a6235eef0a..121ed198a5958 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -351,7 +351,7 @@ def AArch64PostLegalizerLowering
// Post-legalization combines which are primarily optimizations.
def AArch64PostLegalizerCombiner
: GICombiner<"AArch64PostLegalizerCombinerImpl",
- [copy_prop, cast_of_cast_combines,
+ [copy_prop, cast_of_cast_combines, constant_fold_fp_ops,
buildvector_of_truncate, integer_of_truncate,
mutate_anyext_to_zext, combines_for_extload,
combine_indexed_load_store, sext_trunc_sextload,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index ea2196a584127..5613364626692 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -678,8 +678,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0)
.clampScalar(0, s8, s64);
getActionDefinitionsBuilder(G_FCONSTANT)
- .legalFor({s32, s64, s128})
- .legalFor(HasFP16, {s16})
+ // Always legalize S16 to prevent G_FCONSTANT being widened to G_CONSTANT
+ .legalFor({s16, s32, s64, s128})
.clampScalar(0, MinFPScalar, s128);
// FIXME: fix moreElementsToNextPow2
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
index c301e76852b54..c00ce2242a888 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
@@ -48,8 +48,9 @@ body: |
; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
; CHECK-NEXT: $x0 = COPY [[C1]](s64)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: $w0 = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
%0:_(s32) = G_FCONSTANT float 1.0
$w0 = COPY %0
%1:_(s64) = G_FCONSTANT double 2.0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
index ddf219dc4927e..c6df3456a8445 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
@@ -8,7 +8,7 @@ tracksRegLiveness: true
body: |
bb.0:
; NO-FP16-LABEL: name: fp16
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
;
@@ -26,7 +26,7 @@ tracksRegLiveness: true
body: |
bb.0:
; NO-FP16-LABEL: name: fp16_non_zero
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
;
@@ -44,7 +44,7 @@ tracksRegLiveness: true
body: |
bb.1.entry:
; NO-FP16-LABEL: name: nan
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01
; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16)
; NO-FP16-NEXT: $w0 = COPY %ext(s32)
; NO-FP16-NEXT: RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index cb5df07c7ede4..e8e563135acc5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -739,15 +739,12 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
;
; GISEL-LABEL: postidx32_shalf:
; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #0 ; =0x0
; GISEL-NEXT: ldr h1, [x0], #4
-; GISEL-NEXT: fmov s2, w8
; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0
; GISEL-NEXT: fmov w9, s0
-; GISEL-NEXT: fcvt s3, h1
+; GISEL-NEXT: fcvt s2, h1
; GISEL-NEXT: fmov w8, s1
-; GISEL-NEXT: fcvt s2, h2
-; GISEL-NEXT: fcmp s3, s2
+; GISEL-NEXT: fcmp s2, #0.0
; GISEL-NEXT: csel w8, w8, w9, mi
; GISEL-NEXT: strh w8, [x1]
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index 079ff1076b110..1c4a6ab2217b0 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -1469,8 +1469,9 @@ define <2 x half> @loaddup_str_v2half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v2half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1526,8 +1527,9 @@ define <3 x half> @loaddup_str_v3half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v3half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1583,8 +1585,9 @@ define <4 x half> @loaddup_str_v4half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v4half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1639,8 +1642,9 @@ define <8 x half> @loaddup_str_v8half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v8half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1713,9 +1717,10 @@ define <16 x half> @loaddup_str_v16half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v16half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h1, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d2, #0000000000000000
; CHECK-GI-NEXT: dup v0.8h, v1.h[0]
; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: str h2, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1771,8 +1776,9 @@ define <2 x bfloat> @loaddup_str_v2bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v2bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -1828,8 +1834,9 @@ define <3 x bfloat> @loaddup_str_v3bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v3bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -1885,8 +1892,9 @@ define <4 x bfloat> @loaddup_str_v4bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v4bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -1941,8 +1949,9 @@ define <8 x bfloat> @loaddup_str_v8bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v8bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -2015,9 +2024,10 @@ define <16 x bfloat> @loaddup_str_v16bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v16bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h1, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d2, #0000000000000000
; CHECK-GI-NEXT: dup v0.8h, v1.h[0]
; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: str h2, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index adc536da26f26..085170c7ba381 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -782,18 +782,17 @@ define void @test_fccmp(half %in, ptr %out) {
;
; CHECK-CVT-GI-LABEL: test_fccmp:
; CHECK-CVT-GI: // %bb.0:
-; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500
-; CHECK-CVT-GI-NEXT: mov w9, #18432 // =0x4800
; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-GI-NEXT: fcvt s2, h0
-; CHECK-CVT-GI-NEXT: fmov s1, w8
-; CHECK-CVT-GI-NEXT: fmov s3, w9
-; CHECK-CVT-GI-NEXT: fmov w9, s0
-; CHECK-CVT-GI-NEXT: fcvt s1, h1
-; CHECK-CVT-GI-NEXT: fcvt s3, h3
-; CHECK-CVT-GI-NEXT: fcmp s2, s1
-; CHECK-CVT-GI-NEXT: fccmp s2, s3, #4, mi
-; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt
+; CHECK-CVT-GI-NEXT: fcvt s1, h0
+; CHECK-CVT-GI-NEXT: fmov s2, #5.00000000
+; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
+; CHECK-CVT-GI-NEXT: fmov s3, #8.00000000
+; CHECK-CVT-GI-NEXT: fcmp s1, s2
+; CHECK-CVT-GI-NEXT: ldr h2, [x8, :lo12:.LCPI29_0]
+; CHECK-CVT-GI-NEXT: fmov w8, s0
+; CHECK-CVT-GI-NEXT: fmov w9, s2
+; CHECK-CVT-GI-NEXT: fccmp s1, s3, #4, mi
+; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt
; CHECK-CVT-GI-NEXT: strh w8, [x0]
; CHECK-CVT-GI-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 51aad4fe25d3b..743d1604388de 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -149,33 +149,21 @@ define i64 @fcvtzs_f64_i64_64(double %dbl) {
}
define i32 @fcvtzs_f16_i32_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i32_7:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_7:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #7
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs w0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI8_0
@@ -189,33 +177,21 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
}
define i32 @fcvtzs_f16_i32_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i32_15:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_15:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #15
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs w0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI9_0
@@ -229,33 +205,21 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
}
define i64 @fcvtzs_f16_i64_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs x0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i64_7:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs x0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_7:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #7
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs x0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI10_0
@@ -269,33 +233,21 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
}
define i64 @fcvtzs_f16_i64_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs x0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i64_15:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs x0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_15:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #15
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs x0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI11_0
@@ -453,33 +405,21 @@ define i64 @fcvtzu_f64_i64_64(double %dbl) {
}
define i32 @fcvtzu_f16_i32_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzu w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i32_7:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzu w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_7:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #7
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzu w0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI20_0
@@ -493,33 +433,21 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
}
define i32 @fcvtzu_f16_i32_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_15:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzu w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i32_15:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzu w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_15:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #15
; CHECK-...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
f14329f
to
af4a9c6
Compare
Rebasing and updating tests now that #161205 is merged. |
This change adds a new folding pattern, folding a G_FPEXT(G_FCONSTANT) to a G_FCONSTANT.
To make this work on AArch64, the
G_FCONSTANT
should not be widened due to theG_FCONSTANT
being converted to aG_CONSTANT
. This should fix some other floating point combines when theG_FCONSTANT
is widened due to being an fp16.